import os
from bs4 import BeautifulSoup

# 指定文件路径
file_path = r"C:\Users\39737\Desktop\photograpy\douban.html"
output_dir = r"C:\Users\39737\Desktop\photograpy"
output_file = os.path.join(output_dir, "movies.txt")

# 检查文件是否存在
if not os.path.exists(file_path):
    print(f"File not found: {file_path}")
else:
    # 读取文件内容
    with open(file_path, "r", encoding="utf-8") as f:
        html = f.read()

    # 使用 BeautifulSoup 解析 HTML
    soup = BeautifulSoup(html, 'html.parser')

    # 查找电影列表
    movie_list = soup.find('ol', class_='grid_view').find_all('li')

    # 打开输出文件
    with open(output_file, "w", encoding="utf-8") as f:
        # 遍历电影列表并提取信息
        for movie in movie_list:
            title = movie.find('div', class_='hd').find('span', class_='title').get_text()
            rating_num = movie.find('div', class_='bd').find('div').find('span', class_='rating_num').get_text()
            comment_num = movie.find('div', class_='bd').find('div').find_all('span')[-1].get_text()
            directors = movie.find('div', class_='bd').find('p').get_text()
            link = movie.find('div', class_='item').find('div', class_='pic').find('a').get('href')
            pic = movie.find('div', class_='item').find('div', class_='pic').find('a').find('img').get('src')

            # 将提取的信息写入文件
            f.write(f"Title: {title}\n")
            f.write(f"Rating: {rating_num}\n")
            f.write(f"Comment: {comment_num}\n")
            f.write(f"Directors: {directors}\n")
            f.write(f"Link: {link}\n")
            f.write(f"Picture: {pic}\n")
            f.write("-" * 50 + "\n")

    print(f"Data has been saved to {output_file}")